####################################################################################

#Making the bourgeoisie? Values, voice, and state-provided homeownership
#File edited in 03/08/2022 to add path to dataset #line 813
#This file recodes variables and performs and saves the analyses for W2
#Figures and tables are created by other files
#See readme.txt for replication instructions

####################################################################################

rm(list=ls())
library(AER)
library(tidyverse)
library(stringi)
library(stringr)
library(readr)
library(readxl)
library(ggplot2)
library(lubridate)
library(sandwich)
library(ri)
library(RItools)
library(genderBR)
library(boot)
library(corrgram)
library(psych)
library(forcats)
library(estimatr)
library(xtable)
library(fastDummies)
library(DeclareDesign)
library(randomizr)
library(ANOVAreplication)
library(miceadds)
library(clubSandwich)
options(scipen=6)

setwd("~/Dropbox/Replication-Making-JoP/") #Set WD for replication
source("Code/functions.R") #auxiliary functions for analysis

####################################################################################
# Read W2 dataset (and perform changes)                                       ######
####################################################################################

load("Data/surveyW2-Making.Rda")
print(comment(survey))
survey <- as.data.frame(survey)

# Create index of attitudes
# Following the same structure for the individual indices
set.seed(1977)
attitudes_index_vars <- imp.mice(survey %>% select(tax_item,
                                                    red_abstract,
                                                    red_concrete,
                                                    effortbetter,
                                                    trustothers,
                                                    successalone,
                                                    moneyimportant,
                                                    govind, 
                                                    competition))
survey <- survey %>% bind_cols(
  attitudes_index = calculate_mean_effects_index(Z = survey$Z,
                     outcome_mat = attitudes_index_vars, greedy = TRUE))


# Create welfare index 
set.seed(1977)
welfare_index_vars <- imp.mice(survey %>% select(pea,inc_above1mw,placement))
survey <- survey %>% bind_cols(
  welfare_index = calculate_mean_effects_index(Z = survey$Z,
                  outcome_mat = welfare_index_vars, greedy = TRUE))

# Create index of collective action 
set.seed(1977)
col_action_index_vars <- imp.mice(survey %>% select(assoc,
                                                    talk_recent,
                                                    contact_gov,
                                                    contact_civsoc,
                                                    contact_collectively,
                                                    effectiveness_gov,
                                                    effectiveness_self))
survey <- survey %>% bind_cols(col_action_index = calculate_mean_effects_index(Z = survey$Z,
                                                 outcome_mat = col_action_index_vars, greedy = TRUE))

# Create index of mobilization 
set.seed(1977)
mobilization_index_vars <- imp.mice(survey %>% select(talk_cand,
                                                      client_support,
                                                      turnout_2016,
                                                      turnout_2018,
                                                      turnout_2020,
                                                      client_g))
survey <- survey %>% bind_cols(
  mobilization_index = calculate_mean_effects_index(Z = survey$Z,
                        outcome_mat = mobilization_index_vars, greedy = TRUE))

surveyw2 <- survey
rm(survey,mobilization_index_vars,col_action_index_vars,attitudes_index_vars)
comment(surveyw2) <- paste("Assembled in ",Sys.Date()," for replication of JOP paper.",sep="")
save(surveyw2,file="Routputs/data-w2-Making-recoded.Rda")

####################################################################################
# Declare variables for W2 (controls and outcomes)                            ######
####################################################################################

# Declare labels for control and balance variables
controls <- pre.treat <-  c("idade", 
                            "sexor", 
                            "race_bin", 
                            "cadunico",
                            "formal_pre",
                            "av_prel")

bal.labels <- c("Age",
                "Sex (male)",
                "Race (white)", 
                "Registry (Cadunico)",
                "Years in Formal Employment", 
                "Avg. Formal Wages")

####################################################################################
# Declare outcomes for analysis                                               ######
####################################################################################

outcomes1 <- c(
  "attitudes_index",
  "self_index",
  "market_index",
  "red_index")

outcomes2 <- c(
  "welfare_index",
  "placement",
  "pea",
  "inc_above1mw")

outcomes3 <- c("hap_index")
outcomes4 <- c("prosp_self") #in W2, we could use the index exp_self which also includes prosp_socio, but in W1 we don't have prosp_socio

#These definitions are needed below, in different parts of the code
uO <- length(grep("outcomes\\d",ls()))
the.fam <- paste("outcomes",1:uO,sep="")
outcomes <- do.call("c",sapply(paste("outcomes",1:uO,sep=""),"get"))
out.class <- paste("H",1:uO,sep="")
names(outcomes)<-NULL
out.selected <- which(is.element(outcomes,c("attitudes_index","welfare_index","hap_index","prosp_self")))
the.labels <-  c("Attitudes Index","Self-reliance",
                 "Market Beliefs","Redistribution",
                 "Welfare Index"
                 ,"Placement"
                 ,"PEA"
                 ,"Inc.>1MW"
                 ,"Happiness"
                 ,"Expectations")

# Check whether labels are correct
main <- rep(0,length(outcomes))
main[out.selected] <- 1
var.labels<- data.frame(outcomes,the.labels,main)
print(var.labels)

# Print summary statistics 
sumtab_raw <- summary_stats(data=subset(surveyw2,select=c("Z","D",pre.treat,outcomes, 
                                                      "tax_item","red_abstract","red_concrete",
                                                      "effortbetter","trustothers","successalone","moneyimportant",
                                                      "govind","competition",
                                                      "col_action_index",
                                                      "assoc",            
                                                      "talk_recent",
                                                      "contact_gov",                     
                                                      "contact_civsoc",  
                                                      "contact_collectively",
                                                      "effectiveness_index")))
#Include labels in most important rows (to facilitate reporting)
rownames(sumtab_raw)[3:18] <- c(bal.labels,the.labels)
rownames(sumtab_raw)[28:nrow(sumtab_raw)] <- c("Col. Action Index", "Association","Talk to Politician"
                                  ,"Petition Govt.","Petition Civl. Soc. Org."
                                  , "Claim Collectively","Effectiveness Index")
  
#Re-scaling outcomes
surveyw2.unscaled <- surveyw2
surveyw2[outcomes] <- scalev(outcomes=outcomes,the.data=surveyw2)

#Standardized Descriptive Table
sumtab_std <- summary_stats(data=subset(surveyw2,select=c("Z","D",pre.treat,outcomes))) 

out.sum.W2 <- list(sumtab.raw=sumtab_raw, sumtab.std=sumtab_std)
comment(out.sum.W2) <- paste("Estimated in ",Sys.Date(),". Summary Stats ",sep="")
save(out.sum.W2,file=paste("Routputs/out-W2-sum.RData",sep=""))

####################################################################################
# Analysis of Pooled Effects                                                  ######
# Estimation (3 estimators ITT-lin, ITT-lm, CACE-lm; 2 variates in each)      ######
# (*) Preferred estimation                                                    ######  
# Summary tables (combining selected ITT and CACE)                            ######
####################################################################################

####  ITT-Lin: two variants  ####
#out.lin       & (no controls, lottery id as covariate)
#out.linc      & (with controls, lottery id as covariate)(*)
tmp <- bind_rows(lapply(1:length(outcomes)
                        ,outcomes= outcomes
                        ,ctrls= c("id_edital")
                        ,clusters="fieldID"
                        ,FUN = est_lin, the.data=surveyw2))
rownames(tmp) <- outcomes
out.lin <- as.data.frame(tmp)[,c("Estimate","Std. Error","Pr(>|t|)","N","Std. Estimate")]                
#Compute HB corrected p-values by hypothesis family
out.lin$HBp <- NA 
for(ii in 1:length(the.fam)){
  out.lin[get(the.fam[ii]),"HBp"] <- p.adjust(out.lin[get(the.fam[ii]),"Pr(>|t|)"],"BH")
}
out.lin$HBp[which(main==1)] <- NA #main outcomes are not subject to correction per PAP


tmp <- bind_rows(lapply(1:length(outcomes)
                        ,outcomes= outcomes
                        ,ctrls= c("id_edital",pre.treat) 
                        ,clusters="fieldID"
                        ,FUN = est_lin, the.data=surveyw2))
rownames(tmp) <- outcomes
out.linc <- as.data.frame(tmp)[,c("Estimate","Std. Error","Pr(>|t|)","N","Std. Estimate")]                
#Compute HB corrected p-values by hypothesis family
out.linc$HBp <- NA 
for(ii in 1:length(the.fam)){
  out.linc[get(the.fam[ii]),"HBp"] <- p.adjust(out.linc[get(the.fam[ii]),"Pr(>|t|)"],"BH")
}
out.linc$HBp[which(main==1)] <- NA #main outcomes are not subject to correction per PAP

out.itt.lin <- list(out.lin=out.lin,out.linc=out.linc)  

####  ITT-Lm: two variants  ####
#out.lin       & (no controls, lottery id as covariate)
#out.linc      & (with controls, lottery id as covariate)(*)
tmp <- bind_rows(lapply(1:length(outcomes)
                        ,outcomes= outcomes
                        ,ctrls= NULL
                        ,fe = "id_edital"
                        ,clusters="fieldID"
                        ,FUN = est_lm, the.data=surveyw2))
rownames(tmp) <- outcomes
out.lm <- as.data.frame(tmp)[,c("Estimate","Std. Error","Pr(>|t|)","N","Std. Estimate")]                
#Compute HB corrected p-values by hypothesis family
out.lm$HBp <- NA 
for(ii in 1:length(the.fam)){
  out.lm[get(the.fam[ii]),"HBp"] <- p.adjust(out.lm[get(the.fam[ii]),"Pr(>|t|)"],"BH")
}
out.lm$HBp[which(main==1)] <- NA #main outcomes are not subject to correction per PAP

tmp <- bind_rows(lapply(1:length(outcomes)
                        ,outcomes= outcomes
                        ,ctrls= pre.treat
                        ,fe = "id_edital"
                        ,clusters="fieldID"
                        ,FUN = est_lm, the.data=surveyw2))
rownames(tmp) <- outcomes
out.lmc <- as.data.frame(tmp)[,c("Estimate","Std. Error","Pr(>|t|)","N","Std. Estimate")]                
#Compute HB corrected p-values by hypothesis family
out.lmc$HBp <- NA 
for(ii in 1:length(the.fam)){
  out.lmc[get(the.fam[ii]),"HBp"] <- p.adjust(out.lmc[get(the.fam[ii]),"Pr(>|t|)"],"BH")
}
out.lmc$HBp[which(main==1)] <- NA #main outcomes are not subject to correction per PAP

#collect results (in a list of lists)
out.itt.lm <- list(out.lm=out.lm,out.lmc=out.lmc)  

####  CACE-Lm: two variants  #### 
#out.lmcace        & (no controls, lottery id as covariate)
#out.lmcacec       & (with controls, lottery id as covariate)(*) 

tmp <- bind_rows(lapply(1:length(outcomes),  outcomes=outcomes, treat="Z"
                        ,compliance="D" 
                        ,the.data=surveyw2
                        ,fe="id_edital"
                        ,clusters="fieldID"
                        ,FUN = est_cace)) 
rownames(tmp) <- outcomes
out.lmcace <- as.data.frame(tmp)[,c("Estimate","Std. Error","Pr(>|t|)","N","Std. Estimate")]                
#Compute HB corrected p-values by hypothesis family
out.lmcace$HBp <- NA 
for(ii in 1:length(the.fam)){
  out.lmcace[get(the.fam[ii]),"HBp"] <- p.adjust(out.lmcace[get(the.fam[ii]),"Pr(>|t|)"],"BH")
}
out.lmcace$HBp[which(main==1)] <- NA #main outcomes are not subject to correction per PAP

tmp <- bind_rows(lapply(1:length(outcomes),  outcomes=outcomes, treat="Z"
                        ,compliance="D" 
                        ,the.data=surveyw2
                        ,ctrls=pre.treat
                        ,fe="id_edital"
                        ,clusters="fieldID"
                        ,FUN = est_cace)) 
rownames(tmp) <- outcomes
out.lmcacec <- as.data.frame(tmp)[,c("Estimate","Std. Error","Pr(>|t|)","N","Std. Estimate")]                
#Compute HB corrected p-values by hypothesis family
out.lmcacec$HBp <- NA 
for(ii in 1:length(the.fam)){
  out.lmcacec[get(the.fam[ii]),"HBp"] <- p.adjust(out.lmcacec[get(the.fam[ii]),"Pr(>|t|)"],"BH")
}
out.lmcacec$HBp[which(main==1)] <- NA #main outcomes are not subject to correction per PAP

#collect results
out.cace.lm <- list(out.lmcace=out.lmcace,out.lmcacec=out.lmcacec)  

### Collect all results (list of lists)
out.all <- list(out.itt.lm=out.itt.lm,out.itt.lin=out.itt.lin,out.cace.lm=out.cace.lm)
comment(out.all) <- paste("Estimated in ",Sys.Date(),". Three estimators, two variants in each.",sep="")
save(out.all,file=paste("Routputs/out-W2-estimates.RData",sep=""))

####################################################################################
# Estimation for  Edital 03.2011 only                                      ######
####################################################################################

####  ITT-Lm: two variants  #### 
#out.lm        & (no controls, lottery id as covariate)
#out.lmc       & (with controls, lottery id as covariate)(*) 

lot03 <- surveyw2 %>% filter(id_edital == "edital03.2011")

tmp <- bind_rows(lapply(1:length(outcomes)
                        ,outcomes= outcomes
                        ,ctrls= NULL
                        ,FUN = est_lm_lot, the.data=lot03))
rownames(tmp) <- outcomes
out.lm <- as.data.frame(tmp)[,c("Estimate","Std. Error","Pr(>|t|)","N","Std. Estimate")]                
#Compute HB corrected p-values by hypothesis family
out.lm$HBp <- NA 
for(ii in 1:length(the.fam)){
  out.lm[get(the.fam[ii]),"HBp"] <- p.adjust(out.lm[get(the.fam[ii]),"Pr(>|t|)"],"BH")
}
out.lm$HBp[which(main==1)] <- NA #main outcomes are not subject to correction per PAP

tmp <- bind_rows(lapply(1:length(outcomes)
                        ,outcomes= outcomes
                        ,ctrls= c(pre.treat)
                        ,FUN = est_lm, the.data=lot03))
rownames(tmp) <- outcomes
out.lmc <- as.data.frame(tmp)[,c("Estimate","Std. Error","Pr(>|t|)","N","Std. Estimate")]                
#Compute HB corrected p-values by hypothesis family
out.lmc$HBp <- NA 
for(ii in 1:length(the.fam)){
  out.lmc[get(the.fam[ii]),"HBp"] <- p.adjust(out.lmc[get(the.fam[ii]),"Pr(>|t|)"],"BH")
}
out.lmc$HBp[which(main==1)] <- NA #main outcomes are not subject to correction per PAP

out.itt.lm03 <- list(out.lm=out.lm,out.lmc=out.lmc)  

####################################################################################
# Estimation for  Edital 06.2011 only                                      ######
####################################################################################

####  ITT-Lm: two variants  #### 
#out.lm        & (no controls, lottery id as covariate)
#out.lmc       & (with controls, lottery id as covariate)(*) 

lot06 <- surveyw2 %>% filter(id_edital == "edital06.2011")

tmp <- bind_rows(lapply(1:length(outcomes)
                        ,outcomes= outcomes
                        ,ctrls= NULL
                        ,FUN = est_lm_lot, the.data=lot06))
rownames(tmp) <- outcomes
out.lm <- as.data.frame(tmp)[,c("Estimate","Std. Error","Pr(>|t|)","N","Std. Estimate")]                
#Compute HB corrected p-values by hypothesis family
out.lm$HBp <- NA 
for(ii in 1:length(the.fam)){
  out.lm[get(the.fam[ii]),"HBp"] <- p.adjust(out.lm[get(the.fam[ii]),"Pr(>|t|)"],"BH")
}
out.lm$HBp[which(main==1)] <- NA #main outcomes are not subject to correction per PAP

tmp <- bind_rows(lapply(1:length(outcomes)
                        ,outcomes= outcomes
                        ,ctrls= c(pre.treat)
                        ,FUN = est_lm, the.data=lot06))
rownames(tmp) <- outcomes
out.lmc <- as.data.frame(tmp)[,c("Estimate","Std. Error","Pr(>|t|)","N","Std. Estimate")]                
#Compute HB corrected p-values by hypothesis family
out.lmc$HBp <- NA 
for(ii in 1:length(the.fam)){
  out.lmc[get(the.fam[ii]),"HBp"] <- p.adjust(out.lmc[get(the.fam[ii]),"Pr(>|t|)"],"BH")
}
out.lmc$HBp[which(main==1)] <- NA #main outcomes are not subject to correction per PAP

out.itt.lm06 <- list(out.lm=out.lm,out.lmc=out.lmc)  

### Collect all results (list of lists)
out.allattitudes_bylot <- list(out.itt.lm03=out.itt.lm03, out.itt.lm06=out.itt.lm06)
comment(out.allattitudes_bylot) <- paste("Estimated in ",Sys.Date(),". One estimator, two variants in each.",sep="")
save(out.allattitudes_bylot,file=paste("Routputs/out-W2-estimatesattitudes-bylot.RData",sep=""))


####################################################################################
# Experiences outcomes                                                        ######
# Estimation (3 estimators ITT-lin, ITT-lm, CACE-lm; 2 variants in each)      ######
# (*) Preferred estimation                                                    ######  
# Summary tables (combining selected ITT and CACE)                            ######
####################################################################################

outcomese1 <- c(
  "mobilization_index",
  "talk_cand",
  "client_support",                        
  "turnout_2016",
  "turnout_2018",
  "turnout_2020",
  "client_g")

outcomese2 <- c(
  "col_action_index",
  "assoc",            
  "talk_recent",#in pap under the "engagement" group, but moved here for consistency
  "contact_gov",                     
  "contact_civsoc",  
  "contact_collectively",
  "effectiveness_index" #includes "effectiveness_gov" and "effectiveness_self"
)

uE <- length(grep("outcomese",ls()))

the.fam <- paste("outcomese",1:uE,sep="")
outcomes <- do.call("c",lapply(paste("outcomese",1:uE,sep=""),"get"))
out.class <- paste("H",1:uE,sep="")
names(outcomes)<-NULL
out.selected <- which(is.element(outcomes,c("mobilization_index","col_action_index")))
the.labels <-  c("Mobilization Index","Talk to Candidate","Support Candidate"
                 ,"Turnout 2016","Turnout 2018","Exp. Turnout 2020","Clientelism"
                 ,"Col. Action Index", "Association","Talk to Politician"
                 ,"Petition Govt.","Petition Civl. Soc. Org."
                 , "Claim Collectively","Effectiveness Index")

# Check whether labels are correct
main <- rep(0,length(outcomes))
main[out.selected] <- 1
var.labels<- data.frame(outcomes,the.labels,main)
print(var.labels)

# Print summary statistics 
sumtab <- summary_stats(data=subset(surveyw2,select=c("Z","D", pre.treat,outcomes, "lottery1"))) 
print(round(sumtab,2))

#Re-scaling outcomes
surveyw2[outcomes] <- scalev(outcomes,surveyw2)

####  ITT-Lin: two variants  ####
#out.exp.lin       & (no controls, lottery id as covariate)
#out.exp.linc      & (with controls, lottery id as covariate)(*)

tmp <- bind_rows(lapply(1:length(outcomes)
                        ,outcomes= outcomes
                        ,ctrls= c("id_edital")
                        ,clusters="fieldID"
                        ,FUN = est_lin, the.data=surveyw2))
rownames(tmp) <- outcomes
out.exp.lin <- as.data.frame(tmp)[,c("Estimate","Std. Error","Pr(>|t|)","N","Std. Estimate")]                
#Compute HB corrected p-values by hypothesis family
out.exp.lin$HBp <- NA 
for(ii in 1:length(the.fam)){
  out.exp.lin[get(the.fam[ii]),"HBp"] <- p.adjust(out.exp.lin[get(the.fam[ii]),"Pr(>|t|)"],"BH")
}
out.exp.lin$HBp[which(main==1)] <- NA #main outcomes are not subject to correction per PAP

tmp <- bind_rows(lapply(1:length(outcomes)
                        ,outcomes= outcomes
                        ,ctrls= c("id_edital",pre.treat)
                        ,clusters="fieldID"
                        ,FUN = est_lin, the.data=surveyw2))
rownames(tmp) <- outcomes
out.exp.linc <- as.data.frame(tmp)[,c("Estimate","Std. Error","Pr(>|t|)","N","Std. Estimate")]                
#Compute HB corrected p-values by hypothesis family
out.exp.linc$HBp <- NA 
for(ii in 1:length(the.fam)){
  out.exp.linc[get(the.fam[ii]),"HBp"] <- p.adjust(out.exp.linc[get(the.fam[ii]),"Pr(>|t|)"],"BH")
}
out.exp.linc$HBp[which(main==1)] <- NA #main outcomes are not subject to correction per PAP


out.exp.itt.lin <- list(out.exp.lin=out.exp.lin,out.exp.linc=out.exp.linc)  

####  ITT-Lm: two variants  #### 
#out.exp.lm        & (no controls, lottery id as covariate)
#out.exp.lmc       & (with controls, lottery id as covariate)(*) 
tmp <- bind_rows(lapply(1:length(outcomes)
                        ,outcomes= outcomes
                        ,ctrls= NULL
                        ,fe = "id_edital"
                        ,clusters="fieldID"
                        ,FUN = est_lm, the.data=surveyw2))
rownames(tmp) <- outcomes
out.exp.lm <- as.data.frame(tmp)[,c("Estimate","Std. Error","Pr(>|t|)","N","Std. Estimate")]                
#Compute HB corrected p-values by hypothesis family
out.exp.lm$HBp <- NA 
for(ii in 1:length(the.fam)){
  out.exp.lm[get(the.fam[ii]),"HBp"] <- p.adjust(out.exp.lm[get(the.fam[ii]),"Pr(>|t|)"],"BH")
}
out.exp.lm$HBp[which(main==1)] <- NA #main outcomes are not subject to correction per PAP

tmp <- bind_rows(lapply(1:length(outcomes)
                        ,outcomes= outcomes
                        ,ctrls= pre.treat
                        ,fe = "id_edital"
                        ,clusters="fieldID"
                        ,FUN = est_lm, the.data=surveyw2))
rownames(tmp) <- outcomes
out.exp.lmc <- as.data.frame(tmp)[,c("Estimate","Std. Error","Pr(>|t|)","N","Std. Estimate")]                
#Compute HB corrected p-values by hypothesis family
out.exp.lmc$HBp <- NA 
for(ii in 1:length(the.fam)){
  out.exp.lmc[get(the.fam[ii]),"HBp"] <- p.adjust(out.exp.lmc[get(the.fam[ii]),"Pr(>|t|)"],"BH")
}
out.exp.lmc$HBp[which(main==1)] <- NA #main outcomes are not subject to correction per PAP

out.exp.itt.lm <- list(out.exp.lm=out.exp.lm,out.exp.lmc=out.exp.lmc)  

####  CACE-Lm: twp  variants (D) #### 
#out.exp.lmcace        & (no controls, lottery id as covariate)
#out.exp.lmcacec       & (with controls, lottery id as covariate)(*)   

tmp <- bind_rows(lapply(1:length(outcomes),  outcomes=outcomes, treat="Z"
                        ,compliance="D" 
                        ,the.data=surveyw2
                        ,fe="id_edital"
                        ,clusters="fieldID"
                        ,FUN = est_cace)) 
rownames(tmp) <- outcomes
out.exp.lmcace <- as.data.frame(tmp)[,c("Estimate","Std. Error","Pr(>|t|)","N","Std. Estimate")]                
#Compute HB corrected p-values by hypothesis family
out.exp.lmcace$HBp <- NA 
for(ii in 1:length(the.fam)){
  out.exp.lmcace[get(the.fam[ii]),"HBp"] <- p.adjust(out.exp.lmcace[get(the.fam[ii]),"Pr(>|t|)"],"BH")
}
out.exp.lmcace$HBp[which(main==1)] <- NA #main outcomes are not subject to correction per PAP


tmp <- bind_rows(lapply(1:length(outcomes),  outcomes=outcomes, treat="Z"
                        ,compliance="D" 
                        ,the.data=surveyw2
                        ,ctrls=pre.treat
                        ,fe="id_edital"
                        ,clusters="fieldID"
                        ,FUN = est_cace)) 
rownames(tmp) <- outcomes
out.exp.lmcacec <- as.data.frame(tmp)[,c("Estimate","Std. Error","Pr(>|t|)","N","Std. Estimate")]                
#Compute HB corrected p-values by hypothesis family
out.exp.lmcacec$HBp <- NA 
for(ii in 1:length(the.fam)){
  out.exp.lmcacec[get(the.fam[ii]),"HBp"] <- p.adjust(out.exp.lmcacec[get(the.fam[ii]),"Pr(>|t|)"],"BH")
}    
out.exp.lmcacec$HBp[which(main==1)] <- NA #main outcomes are not subject to correction per PAP


out.exp.cace.lm <- list(out.exp.lmcace=out.exp.lmcace,out.exp.lmcacec=out.exp.lmcacec)  

### Collect all results (list of lists)
out.exp.all <- list(out.exp.itt.lm=out.exp.itt.lm,
                    out.exp.itt.lin=out.exp.itt.lin,
                    out.exp.cace.lm=out.exp.cace.lm)

comment(out.exp.all) <- paste("Estimated in ",Sys.Date(),". Three estimators, two variants in each.",sep="")
save(out.exp.all,file=paste("Routputs/out-W2-estimates-experiences.RData",sep=""))

################ Edital 03.2011

####  ITT-Lm: two variants  #### 
#out.lm        & (no controls, lottery id as covariate)
#out.lmc       & (with controls, lottery id as covariate)

lot03 <- surveyw2 %>% filter(id_edital == "edital03.2011")

tmp <- bind_rows(lapply(1:length(outcomes)
                        ,outcomes= outcomes
                        ,ctrls= NULL
                        ,FUN = est_lm_lot, the.data=lot03))
rownames(tmp) <- outcomes
out.lm <- as.data.frame(tmp)[,c("Estimate","Std. Error","Pr(>|t|)","N","Std. Estimate")]                
#Compute HB corrected p-values by hypothesis family
out.lm$HBp <- NA 
for(ii in 1:length(the.fam)){
  out.lm[get(the.fam[ii]),"HBp"] <- p.adjust(out.lm[get(the.fam[ii]),"Pr(>|t|)"],"BH")
}
out.lm$HBp[which(main==1)] <- NA #main outcomes are not subject to correction per PAP

tmp <- bind_rows(lapply(1:length(outcomes)
                        ,outcomes= outcomes
                        ,ctrls= c(pre.treat)
                        ,FUN = est_lm, the.data=lot03))
rownames(tmp) <- outcomes
out.lmc <- as.data.frame(tmp)[,c("Estimate","Std. Error","Pr(>|t|)","N","Std. Estimate")]                
#Compute HB corrected p-values by hypothesis family
out.lmc$HBp <- NA 
for(ii in 1:length(the.fam)){
  out.lmc[get(the.fam[ii]),"HBp"] <- p.adjust(out.lmc[get(the.fam[ii]),"Pr(>|t|)"],"BH")
}
out.lmc$HBp[which(main==1)] <- NA #main outcomes are not subject to correction per PAP

out.itt.lm03 <- list(out.lm=out.lm,out.lmc=out.lmc)  

################ Edital 06.2011

####  ITT-Lm: two variants  #### 
#out.lm        & (no controls, lottery id as covariate)
#out.lmc       & (with controls, lottery id as covariate)

lot06 <- surveyw2 %>% filter(id_edital == "edital06.2011")

tmp <- bind_rows(lapply(1:length(outcomes)
                        ,outcomes= outcomes
                        ,ctrls= NULL
                        ,FUN = est_lm_lot, the.data=lot06))
rownames(tmp) <- outcomes
out.lm <- as.data.frame(tmp)[,c("Estimate","Std. Error","Pr(>|t|)","N","Std. Estimate")]                
#Compute HB corrected p-values by hypothesis family
out.lm$HBp <- NA 
for(ii in 1:length(the.fam)){
  out.lm[get(the.fam[ii]),"HBp"] <- p.adjust(out.lm[get(the.fam[ii]),"Pr(>|t|)"],"BH")
}
out.lm$HBp[which(main==1)] <- NA #main outcomes are not subject to correction per PAP

tmp <- bind_rows(lapply(1:length(outcomes)
                        ,outcomes= outcomes
                        ,ctrls= c(pre.treat)
                        ,FUN = est_lm, the.data=lot06))
rownames(tmp) <- outcomes
out.lmc <- as.data.frame(tmp)[,c("Estimate","Std. Error","Pr(>|t|)","N","Std. Estimate")]                
#Compute HB corrected p-values by hypothesis family
out.lmc$HBp <- NA 
for(ii in 1:length(the.fam)){
  out.lmc[get(the.fam[ii]),"HBp"] <- p.adjust(out.lmc[get(the.fam[ii]),"Pr(>|t|)"],"BH")
}
out.lmc$HBp[which(main==1)] <- NA #main outcomes are not subject to correction per PAP

out.itt.lm06 <- list(out.lm=out.lm,out.lmc=out.lmc)  

### Collect all results (list of lists)
out.allexperiences_bylot <- list(out.itt.lm03=out.itt.lm03, out.itt.lm06=out.itt.lm06)
comment(out.allexperiences_bylot) <- paste("Estimated in ",Sys.Date(),". One estimator, two variants in each.",sep="")
save(out.allexperiences_bylot,file=paste("Routputs/out-W2-estimatesexperiences-bylot.RData",sep=""))

####################################################################################
# Balance by Lottery                                                          ######
####################################################################################

the.lotteries <- unique(surveyw2$id_edital)
# Loop over lotteries  #
bal.stats <- list() #object to store balance results
for(ii in 1:length(the.lotteries)){ 
  print(the.lotteries[ii])
  lot <- subset(surveyw2,id_edital==the.lotteries[ii])
  #Implements an F-test
  the.formula <- formula(paste("Z~",paste(pre.treat,collapse="+")))
  m1 <- lm_robust(the.formula, 
                  data = lot, 
                  se_type = "stata")
  #Standard F-test 
  the.wald <- lmtest::waldtest(m1,  test = c("F"))
  print(the.wald)
  # Compute the p-value of the F-tests based on the permutation distribution of W
  W_obs <- lmtest::waldtest(m1,  test = c("F"))[2,3] #store f-test for boostrapped p-values 
  N <- nrow(lot)
  sims <- 1000
  n_treat <- as.numeric(table(lot$Z)[2])
  W_sims <- numeric(sims)
  set.seed(30033)
  for(i in 1:sims){
    lot$Z_sim <- complete_ra(N, n_treat)
    fit_sim1 <- lm_robust(formula(paste("Z_sim~",paste(pre.treat,collapse="+"))), 
                          data = lot, #weights = 1/sample_weights, 
                          se_type = "stata")
    W_sims[i] <- lmtest::waldtest(fit_sim1,  test = c("F"))[2,3]
  }
  p.permutation <- mean(W_sims >= W_obs)
  # Bootrsapped p-value o F-test
  cat("Simulated p-value:",p.permutation,"\n")
  # Compute balance variable by variable lmr
  ballmr <- bind_rows(lapply(1:length(pre.treat), outcomes=pre.treat, FUN = est_lm,
                             ctrls = NULL, the.data=lot)) 
  rownames(ballmr) <- pre.treat
  print(ballmr)
  #assemble objects with balance stats
  bal.stats[[the.lotteries[ii]]] <- list(N=table(lot$Z),wald=the.wald,p.permutation=p.permutation,
                                         balr = ballmr)
} #end loop for lottery
save(bal.stats,file=paste("Routputs/out-W2-balancebylottery.RData",sep=""))

####################################################################################
# Balance Pooled                                                         ###########
####################################################################################
# Loop over lotteries  #
pol.bal.stats <- list() #object to store balance results
#Implments an F-test
the.formula <- formula(paste("Z~",paste(pre.treat,collapse="+")))
m1 <- lm_robust(the.formula, 
                data = surveyw2, clusters = fieldID,
                fixed_effects =~id_edital, 
                se_type = "stata")
#Standard F-test 
ftest <- c(print(summary(m1)$proj_fstatistic), pf(as.numeric(summary(m1)$proj_fstatistic[1]),
                                               as.numeric(summary(m1)$proj_fstatistic[2]),
                                               as.numeric(summary(m1)$proj_fstatistic[3]),lower.tail=F))
# Compute the p-value of the F-tests based on the permutation distribution of W
W_obs <- as.numeric(summary(m1)$proj_fstatistic[1]) #store f-test for bootstrapped p-values 
N <- nrow(surveyw2)
sims <- 1000
n_treat <- as.numeric(table(surveyw2$Z)[2])
W_sims <- numeric(sims)
set.seed(30033)
for(i in 1:sims){
  surveyw2$Z_sim <- complete_ra(N, n_treat)
  fit_sim1 <- lm_robust(formula(paste("Z_sim~",paste(pre.treat,collapse="+"))), 
                        data = surveyw2,
                        cluster = fieldID, 
                        fixed_effects =~id_edital,
                        se_type = "stata")
  W_sims[i] <- as.numeric(summary(fit_sim1)$proj_fstatistic[1])
}
p.permutation <- mean(W_sims >= W_obs)
# Bootstrapped p-value o F-test
cat("Simulated p-value:",p.permutation,"\n")

# Compute balance variable by variable lm_robust
ballmr <- bind_rows(lapply(1:length(pre.treat), outcomes=pre.treat, FUN = est_lm,
                           ctrls= NULL,
                           fe = "id_edital", clusters = "fieldID",
                           the.data=surveyw2))
rownames(ballmr) <- pre.treat
print(ballmr)

# Compute balance variable by variable Lin
ballm <- bind_rows(lapply(1:length(pre.treat), outcomes=pre.treat, FUN = est_lin,
                          ctrls = "id_edital", clusters = "fieldID",
                          the.data=surveyw2))
rownames(ballm) <- pre.treat
print(ballm)
#assemble objects with balance stats

pol.bal.stats <- list(N=table(surveyw2$Z),wald=ftest,p.permutation=p.permutation,bal=ballm, balr=ballmr)
save(pol.bal.stats,file=paste("Routputs/out-W2-balancepooled.RData",sep=""))

####################################################################################
# Attrition                                                         ################
# Note that different data is used                                  ################
####################################################################################

#Data for attrition analysis
load("Data/W2_attrition_public.Rda")
load("Data/W2_attrition_overall_public.Rda")

### Answered by treatment and control (overall contacted -- may or may not have answered)

attrition_overall <- lm_lin(interviewed ~ treat_ind, cluster = fieldID,
                            se_type = "stata", covariates = ~ id_edital,
                            data = W2_attrition_overall)

attrition_overallfe <- lm_robust(interviewed ~ treat_ind, cluster = fieldID,
                                 se_type = "stata", fixed_effects = ~ id_edital,
                                 data = W2_attrition_overall)


### Answered by treatment and control (among those who answered)

attrition_answered <- lm_lin(interviewed ~ treat_ind, cluster = fieldID,
                             se_type = "stata", covariates = ~ id_edital,
                             data = W2_attrition)

attrition_answeredfe <- lm_robust(interviewed ~ treat_ind, cluster = fieldID,
                                  se_type = "stata", fixed_effects = ~ id_edital,
                                  data = W2_attrition)

##### Remove missing for Wald Test
W2_attrition_overall2 <- W2_attrition_overall %>% filter(!is.na(sexor))
W2_attrition2 <- W2_attrition %>% filter(!is.na(sexor))

#Including interactions of covariates
attrition_overall_covi <- lm_robust(interviewed ~ treat_ind + formal_pre_prop + sexor + av_prel + 
                                      treat_ind*formal_pre_prop + treat_ind*sexor + treat_ind*av_prel, 
                                    se_type = "stata", fixed_effects = ~ id_edital, clusters = fieldID,
                                    data = W2_attrition_overall2)

attrition_answered_covi <- lm_robust(interviewed ~ treat_ind + formal_pre_prop + sexor + av_prel + 
                                       treat_ind*formal_pre_prop + treat_ind*sexor + treat_ind*av_prel, 
                                     se_type = "stata", fixed_effects = ~ id_edital, clusters = fieldID,
                                     data = W2_attrition2)

### No interactions

attrition_overall_cov <- lm_robust(interviewed ~ treat_ind + formal_pre_prop + sexor + av_prel,
                                   se_type = "stata", fixed_effects = ~ id_edital, clusters = fieldID,
                                   data = W2_attrition_overall2)

attrition_answered_cov <- lm_robust(interviewed ~ treat_ind + formal_pre_prop + sexor + av_prel,
                                    se_type = "stata", fixed_effects = ~ id_edital, clusters = fieldID,
                                    data = W2_attrition2)

#Joint hypotheses test
wald_interviewed <- waldtest(attrition_overall_covi, attrition_overall_cov)
wald_interviewed_pickedup <- waldtest(attrition_answered_covi, attrition_answered_cov)

attrition.stats.W2 <- list(wald_interviewed = wald_interviewed, 
                        wald_interviewed_pickedup = wald_interviewed_pickedup, 
                        attrition_overall = attrition_overall, 
                        attrition_overall_fe = attrition_overallfe, 
                        attrition_answered = attrition_answered, 
                        attrition_answered_fe = attrition_answeredfe)

save(attrition.stats.W2,file=paste("Routputs/out-W2-attrition.RData",sep=""))

####################################################################################
# Comparing Admin Analysis to Survey Analysis                       ################
# Note that different data is used                                  ################
####################################################################################

# Load the dataset for comparison 
load("Data/W2_attrition_admin_overall_public.Rda")

#Admin analysis
admin_av <-  lm_robust(av_postl ~ treat_indf, cluster = fieldID,
                       se_type = "stata"
                       , fixed_effects =~id_edital,
                       data = data_admin)

admin_formal <- lm_robust(formal_post ~ treat_indf, cluster = fieldID,
                          se_type = "stata", fixed_effects =~id_edital,
                          data = data_admin)
#Survey analysis
survey_av <- lm_robust(av_postl ~ Z, cluster = fieldID,
                       se_type = "stata", fixed_effects =~id_edital,
                       data = surveyw2)

survey_formal <- lm_robust(formal_post ~ Z, cluster = fieldID,
                           se_type = "stata", fixed_effects =~id_edital,
                           data = surveyw2)

attrition.stats.W2.admin <- list(admin_av = admin_av, 
                                 admin_formal = admin_formal, 
                                 survey_av = survey_av, 
                                 survey_formal = survey_formal)

save(attrition.stats.W2.admin,file=paste("Routputs/out-W2-attrition-admin.RData",sep=""))

####################################################################################
# Exporatory unplanned analysis
# Outcomes are the items in attitude indices (Di Tella's)                     ######
# Estimating only two variants of each estimator,                             ######
# Estimation (3 estimators ITT-lin, ITT-lm, CACE-lm)                          ######
# Summary tables (combining selected ITT and CACE)                            ######
####################################################################################

outcomesu1 <- c(
  "effortbetter",
  "trustothers",
  "successalone",
  "moneyimportant")

outcomesu2 <- c(
  "tax_item",
  "red_abstract",
  "red_concrete")

outcomesu3 <- c(
  "govind",
  "competition")

outcomesu4 <- c(
  "prosp_self",
  "prosp_socio")

outcomesu5 <- c(#parts of indices in collective action
  "contact_gov",                     
  "contact_civsoc",
  "effectiveness_gov",               
  "effectiveness_self",
  "contact_transport_v2",               
  "contact_healtheduc_v2",
  "contact_infrastructure_v2",          
  "contact_security_v2"                 
)

outcomesu6 <- c(
  "current_rel",
  "av_postl",                        
  "formal_post",
  "commute")

outcomesu7 <- c(
  "netwk_help",
  "netwk_size_d",
  "retrosp_econself",
  "retrosp_gov",
  "retrosp_talenteffort")

outcomesu8 <- c(
  "satisfh_index",
  "satisf_size",
  "satisf_conf",
  "satisf_cost",
  "satisfn_index",
  "satisfn_violence",
  "satisfn_transp",
  "satisfn_health",
  "satisfn_educ",
  "hope_better_life",
  "would_move")

w2.attitudes <- subset(surveyw2,select=c(outcomesu1,outcomesu2,outcomesu3,pre.treat
                                         ,"fieldID","Z","D","id_edital"))
save(w2.attitudes,file="Routputs/data-w2-attitudes.RData")

uG <- length(grep("outcomesu",ls()))
the.fam <- paste("outcomesu",1:uG,sep="")
outcomes <- do.call("c",sapply(paste("outcomesu",1:uG,sep=""),"get"))
out.class <- paste("H",1:uG,sep="")
names(outcomes)<-NULL
out.selected <- c(1,5,8,10,12,18,22,29)
names(outcomes)<-NULL
the.labels <-  c("Effort Better", "Trust Others", "Success Alone", 
                 "Money Important", 
                 "Tax Item", "Redistribution (Abstract)", 
                 "Redistribution (Concrete)","Govt vs. Individual", 
                 "Comp. is Good",
                 "Expect. Self","Expect. Socio",
                 "Claim-making Govt.", 
                 "Claim-making CSO", "Effectiv. of Claim", "Effectiv. of Change",
                 "Claim - Transport.", "Claim - Health/Educ.", 
                 "Claim - Infra.", "Claim - Security",
                 "Rel. Econ. Situation", "Avg. Income", "Formal Job", 
                 "Commute (dummy)", 
                 "Help from network", "Network size (dummy)",
                 "Retrosp.","Talent Effort","Government",
                 "Satisf. Home (index)",
                 "Satisf. Size",
                 "Satisf. Conf.",
                 "Satisf. Cost",
                 "Satisf. N'hood (index)",
                 "Satisf. Violence",
                 "Satisf. Transp.",
                 "Satisf. Health",
                 "Satisf. Educ",
                 "Better Life","Would Move"
)

# Check whether labels are correct
print(data.frame(outcomes,the.labels))

# Print summary statistics 
sumtab <- summary_stats(data=subset(surveyw2,select=c("Z","D",pre.treat,outcomes))) 
print(round(sumtab,2))

#Re-scaling outcomes
surveyw2[outcomes] <- scalev(outcomes,surveyw2)

####  ITT-Lin: two variants only for unplanned outcomes  ####
#out.lin       & (no age and no controls, edital as covariate and survey weights)
#out.linc      & (with age and controls, edital as covariate and survey weights)(*)
tmp <- bind_rows(lapply(1:length(outcomes)
                        ,outcomes= outcomes
                        ,ctrls= c("id_edital")
                        ,clusters="fieldID"
                        ,FUN = est_lin, the.data=surveyw2))
rownames(tmp) <- outcomes
out.lin <- as.data.frame(tmp)[,c("Estimate","Std. Error","Pr(>|t|)","N","Std. Estimate")]                
#Compute HB corrected p-values by hypothesis family
out.lin$HBp <- NA 
for(ii in 1:length(the.fam)){
  out.lin[get(the.fam[ii]),"HBp"] <- p.adjust(out.lin[get(the.fam[ii]),"Pr(>|t|)"],"BH")
}
out.lin$HBp[out.selected] <- NA #main outcomes are not subject to correction per PAP

tmp <- bind_rows(lapply(1:length(outcomes)
                        ,outcomes= outcomes
                        ,ctrls= c("id_edital",pre.treat)
                        ,clusters="fieldID"
                        ,FUN = est_lin, the.data=surveyw2))
rownames(tmp) <- outcomes
out.linc <- as.data.frame(tmp)[,c("Estimate","Std. Error","Pr(>|t|)","N","Std. Estimate")]                
#Compute HB corrected p-values by hypothesis family
out.linc$HBp <- NA 
for(ii in 1:length(the.fam)){
  out.linc[get(the.fam[ii]),"HBp"] <- p.adjust(out.linc[get(the.fam[ii]),"Pr(>|t|)"],"BH")
}
out.linc$HBp[which(main==1)] <- NA #main outcomes are not subject to correction per PAP

out.itt.un.lin <- list(out.lin=out.lin,out.linc=out.linc)  

####  ITT-Lm: two variants  #### 
#out.lm        & (no age and no controls, edital as covariate and survey weights)
#out.lmc       & (with age and controls, edital as covariate and survey weights)(*)
tmp <- bind_rows(lapply(1:length(outcomes)
                        ,outcomes= outcomes
                        ,ctrls= NULL
                        ,fe = "id_edital"
                        ,clusters="fieldID"
                        ,FUN = est_lm, the.data=surveyw2))
rownames(tmp) <- outcomes
out.lm <- as.data.frame(tmp)[,c("Estimate","Std. Error","Pr(>|t|)","N","Std. Estimate")]                
#Compute HB corrected p-values by hypothesis family
out.lm$HBp <- NA 
for(ii in 1:length(the.fam)){
  out.lm[get(the.fam[ii]),"HBp"] <- p.adjust(out.lm[get(the.fam[ii]),"Pr(>|t|)"],"BH")
}
out.lm$HBp[which(main==1)] <- NA #main outcomes are not subject to correction per PAP

tmp <- bind_rows(lapply(1:length(outcomes)
                        ,outcomes= outcomes
                        ,ctrls= c(pre.treat)
                        ,fe = "id_edital"
                        ,clusters="fieldID"
                        ,FUN = est_lm, the.data=surveyw2))
rownames(tmp) <- outcomes
out.lmc <- as.data.frame(tmp)[,c("Estimate","Std. Error","Pr(>|t|)","N","Std. Estimate")]                
#Compute HB corrected p-values by hypothesis family
out.lmc$HBp <- NA 
for(ii in 1:length(the.fam)){
  out.lmc[get(the.fam[ii]),"HBp"] <- p.adjust(out.lmc[get(the.fam[ii]),"Pr(>|t|)"],"BH")
}
out.lmc$HBp[which(main==1)] <- NA #main outcomes are not subject to correction per PAP

out.itt.un.lm <- list(out.lm=out.lm,out.lmc=out.lmc)  

####  CACE-Lm: two variants (D, admin main) #### 
#out.lmcace        & (no age and no controls, edital as covariate and survey weights)
#out.lmcacec       & (with age and controls, edital as covariate and survey weights)(*) 

tmp <- bind_rows(lapply(1:length(outcomes),  outcomes=outcomes, treat="Z"
                        ,compliance="D" 
                        ,the.data=surveyw2
                        ,fe="id_edital"
                        ,clusters="fieldID"
                        ,FUN = est_cace)) 
rownames(tmp) <- outcomes
out.lmcace <- as.data.frame(tmp)[,c("Estimate","Std. Error","Pr(>|t|)","N","Std. Estimate")]                
#Compute HB corrected p-values by hypothesis family
out.lmcace$HBp <- NA 
for(ii in 1:length(the.fam)){
  out.lmcace[get(the.fam[ii]),"HBp"] <- p.adjust(out.lmcace[get(the.fam[ii]),"Pr(>|t|)"],"BH")
}
out.lmcace$HBp[which(main==1)] <- NA #main outcomes are not subject to correction per PAP

tmp <- bind_rows(lapply(1:length(outcomes),  outcomes=outcomes, treat="Z"
                        ,compliance="D"  
                        ,the.data=surveyw2
                        ,ctrls=pre.treat
                        ,fe="id_edital"
                        ,clusters="fieldID"
                        ,FUN = est_cace)) 
rownames(tmp) <- outcomes
out.lmcacec <- as.data.frame(tmp)[,c("Estimate","Std. Error","Pr(>|t|)","N","Std. Estimate")]                
#Compute HB corrected p-values by hypothesis family
out.lmcacec$HBp <- NA 
for(ii in 1:length(the.fam)){
  out.lmcacec[get(the.fam[ii]),"HBp"] <- p.adjust(out.lmcacec[get(the.fam[ii]),"Pr(>|t|)"],"BH")
}    
out.lmcacec$HBp[which(main==1)] <- NA #main outcomes are not subject to correction per PAP

out.cace.un.lm <- list(out.lmcace=out.lmcace,out.lmcacec=out.lmcacec)  

### Collect all results (list of lists)
out.un.all <- list(out.itt.un.lm=out.itt.un.lm,
                   out.itt.un.lin=out.itt.un.lin,
                   out.cace.un.lm=out.cace.un.lm)
comment(out.un.all) <- paste("Estimated in ",Sys.Date(),". Two estimators, two variants in each.",sep="")
save(out.un.all,file=paste("Routputs/out-W2-estimates-uplanned.RData",sep=""))

####################################################################################
# Estimates using IPW approach
# Attitudes and experience outcomes
####################################################################################

#Getting all outcomes
outcomes1 <- c(
  "attitudes_index",
  "self_index",
  "market_index",
  "red_index"
)
outcomes2 <- c(
  "welfare_index",
  "placement",
  "pea",
  "inc_above1mw"
)
outcomes3 <- c("hap_index")
outcomes4 <- c("prosp_self") 

#These definitions are needed below, in different parts of the code
uO <- length(grep("outcomes\\d",ls()))
the.fam <- paste("outcomes",1:uO,sep="")
outcomes <- do.call("c",sapply(paste("outcomes",1:uO,sep=""),"get"))
the.labels <-  c("Attitudes Index","Individualism",
                 "Market Beliefs","Redistribution",
                 "Welfare Index"
                 ,"Placement"
                 ,"PEA"
                 ,"Inc.>1MW"
                 ,"Happiness"
                 ,"Expectations"
)

outcomese2 <- c(
  "col_action_index",
  "assoc",            
  "talk_recent",
  "contact_gov",                     
  "contact_civsoc",  
  "contact_collectively",
  "effectiveness_index" 
)

the.labels.all <- c("Attitudes Index","Individualism",
                    "Market Beliefs","Redistribution",
                    "Welfare Index"
                    ,"Placement"
                    ,"PEA"
                    ,"Inc.>1MW"
                    ,"Happiness"
                    ,"Expectations", 
                    "Col. Action Index", "Association","Talk to Politician"
                    ,"Petition Govt.","Petition Civl. Soc. Org."
                    , "Claim Collectively","Effectiveness Index")

#Attitudes with controls
tmp_ipw <- bind_rows(lapply(1:length(outcomes)
                            ,outcomes= outcomes
                            ,ctrls= pre.treat
                            ,clusters="fieldID"
                            , w = surveyw2$ipw
                            ,FUN = est_ipw, the.data=surveyw2))
rownames(tmp_ipw) <- outcomes
out.ipwc <- as.data.frame(tmp_ipw)[,c("Estimate","Std. Error","Pr(>|t|)","N","Std. Estimate")]                

#Attitudes without controls 
tmp_ipw <- bind_rows(lapply(1:length(outcomes)
                            ,outcomes= outcomes
                            ,ctrls= NULL
                            ,clusters="fieldID"
                            , w = surveyw2$ipw
                            ,FUN = est_ipw, the.data=surveyw2))
rownames(tmp_ipw) <- outcomes
out.ipw <- as.data.frame(tmp_ipw)[,c("Estimate","Std. Error","Pr(>|t|)","N","Std. Estimate")]                

#Collective action with controls
tmp_ipw_m <- bind_rows(lapply(1:length(outcomese2)
                              ,outcomes= outcomese2
                              ,ctrls= pre.treat
                              ,clusters="fieldID"
                              , w = surveyw2$ipw
                              ,FUN = est_ipw, the.data=surveyw2))
rownames(tmp_ipw_m) <- outcomese2
out.ipw_mc <- as.data.frame(tmp_ipw_m)[,c("Estimate","Std. Error","Pr(>|t|)","N","Std. Estimate")]                

#Collective action without controls
tmp_ipw_m <- bind_rows(lapply(1:length(outcomese2)
                              ,outcomes= outcomese2
                              ,ctrls= NULL
                              ,clusters="fieldID"
                              , w = surveyw2$ipw
                              ,FUN = est_ipw, the.data=surveyw2))
rownames(tmp_ipw_m) <- outcomese2
out.ipw_m <- as.data.frame(tmp_ipw_m)[,c("Estimate","Std. Error","Pr(>|t|)","N","Std. Estimate")]                


out.all <- list(out.itt.ipwc=out.ipwc, out.itt.ipw_mc = out.ipw_mc, 
                out.itt.ipw=out.ipw, out.itt.ipw_m = out.ipw_m)
comment(out.all) <- paste("Estimated in ",Sys.Date(),"IPW estimates with and without controls.",sep="")
save(out.all,file=paste("Routputs/out-W2-estimates-ipw.RData",sep=""))


####################################################################################
# Analysis of W2 Sample Matched to Resemble W1
# We re-estimates selected Pooled Effects                                     ######
# Estimation (2 estimators ITT-lin, ITT-lm)                                   ######
# Summary tables (combining selected ITT)                                     ######
####################################################################################

load("Routputs/matching_weights.RData")
matching_weights <- subset(matching_weights,wave2==1)
w2matched <-  subset(matching_weights,wave2==1) %>% left_join(surveyw2, by = c("fieldID","id_edital"))

# Declare labels for control and balance variables
controls <- pre.treat <-  c("idade", 
                            "sexor", 
                            "race_bin", 
                            "cadunico",
                            "formal_pre",
                            "av_prel")

bal.labels <- c("Age",
                "Sex (male)",
                "Race (white)", 
                "Registry (Cadunico)",
                "Years in Formal Employment", 
                "Avg. Formal Wages")

outcomes1 <- c(
  "attitudes_index",
  "self_index",
  "market_index",
  "red_index")

outcomes2 <- c(
  "welfare_index",
  "placement",
  "pea",
  "inc_above1mw")

outcomes3 <- c("hap_index")
outcomes4 <- c("prosp_self") #in W2, we could use the index exp_self which also includes prosp_socio, but in W1 we don't have prosp_socio

#These definitions are needed below, in different parts of the code
uO <- length(grep("outcomes\\d",ls()))
the.fam <- paste("outcomes",1:uO,sep="")
outcomes <- do.call("c",sapply(paste("outcomes",1:uO,sep=""),"get"))
out.class <- paste("H",1:uO,sep="")
names(outcomes)<-NULL
out.selected <- which(is.element(outcomes,c("attitudes_index","welfare_index","hap_index","prosp_self")))
the.labels <-  c("Attitudes Index","Self-reliance",
                 "Market Beliefs","Redistribution",
                 "Welfare Index"
                 ,"Placement"
                 ,"PEA"
                 ,"Inc.>1MW"
                 ,"Happiness"
                 ,"Expectations")

# Check whether labels are correct
main <- rep(0,length(outcomes))
main[out.selected] <- 1
var.labels<- data.frame(outcomes,the.labels,main)
print(var.labels)

####  ITT-Lin: two variants  ####
#out.lin       & (no age, no controls, with edital as covariates and survey weights)
#out.linc      & (with age and controls, with edital as covariates and survey weights)(*)
tmp <- bind_rows(lapply(1:length(outcomes)
                        ,outcomes= outcomes
                        ,ctrls= c("id_edital")
                        ,clusters="fieldID"
                        ,FUN = est_lin, the.data=w2matched
                        ,w=w2matched$weights))
rownames(tmp) <- outcomes
out.linw <- as.data.frame(tmp)[,c("Estimate","Std. Error","Pr(>|t|)","N","Std. Estimate")]                
#Compute HB corrected p-values by hypothesis family
out.linw$HBp <- NA 
for(ii in 1:length(the.fam)){
  out.linw[get(the.fam[ii]),"HBp"] <- p.adjust(out.linw[get(the.fam[ii]),"Pr(>|t|)"],"BH")
}
out.linw$HBp[which(main==1)] <- NA #main outcomes are not subject to correction per PAP

tmp <- bind_rows(lapply(1:length(outcomes)
                        ,outcomes= outcomes
                        ,ctrls= c("id_edital",pre.treat)
                        ,clusters="fieldID"
                        ,FUN = est_lin, the.data=w2matched
                        ,w=w2matched$weights))
rownames(tmp) <- outcomes
out.lincw <- as.data.frame(tmp)[,c("Estimate","Std. Error","Pr(>|t|)","N","Std. Estimate")]                
#Compute HB corrected p-values by hypothesis family
out.lincw$HBp <- NA 
for(ii in 1:length(the.fam)){
  out.lincw[get(the.fam[ii]),"HBp"] <- p.adjust(out.lincw[get(the.fam[ii]),"Pr(>|t|)"],"BH")
}
out.lincw$HBp[which(main==1)] <- NA #main outcomes are not subject to correction per PAP

out.itt.linw <- list(out.linw=out.linw,out.lincw=out.lincw)  

####  ITT-Lm: two variants  #### 
#out.lm        & (no age, no controls, with edital as covariates and survey weights)
#out.lmc       & (with age and controls, with edital as covariates and survey weights)(*) 
tmp <- bind_rows(lapply(1:length(outcomes)
                        ,outcomes= outcomes
                        ,ctrls= NULL
                        ,fe = "id_edital"
                        ,clusters="fieldID"
                        ,FUN = est_lm, the.data=w2matched
                        ,w=w2matched$weights))
rownames(tmp) <- outcomes
out.lmw <- as.data.frame(tmp)[,c("Estimate","Std. Error","Pr(>|t|)","N","Std. Estimate")]                
#Compute HB corrected p-values by hypothesis family
out.lmw$HBp <- NA 
for(ii in 1:length(the.fam)){
  out.lmw[get(the.fam[ii]),"HBp"] <- p.adjust(out.lmw[get(the.fam[ii]),"Pr(>|t|)"],"BH")
}
out.lmw$HBp[which(main==1)] <- NA #main outcomes are not subject to correction per PAP

tmp <- bind_rows(lapply(1:length(outcomes)
                        ,outcomes= outcomes
                        ,ctrls= pre.treat
                        ,fe = "id_edital"
                        ,clusters="fieldID"
                        ,FUN = est_lm, the.data=w2matched
                        ,w=w2matched$weights))
rownames(tmp) <- outcomes
out.lmcw <- as.data.frame(tmp)[,c("Estimate","Std. Error","Pr(>|t|)","N","Std. Estimate")]                
#Compute HB corrected p-values by hypothesis family
out.lmcw$HBp <- NA 
for(ii in 1:length(the.fam)){
  out.lmcw[get(the.fam[ii]),"HBp"] <- p.adjust(out.lmcw[get(the.fam[ii]),"Pr(>|t|)"],"BH")
}
out.lmcw$HBp[which(main==1)] <- NA #main outcomes are not subject to correction per PAP

out.itt.lmw <- list(out.lmw=out.lmw,out.lmcw=out.lmcw)  

### Collect all results (list of lists)
out.allw <- list(out.itt.lmw=out.itt.lmw,out.itt.linw=out.itt.linw)
comment(out.allw) <- paste("Estimated in ",Sys.Date(),". Using dataset matched between waves.",sep="")
save(out.allw,file=paste("Routputs/out-W2-matchedestimates.RData",sep=""))

